\documentclass{beamer}
\usecolortheme{beaver}
\beamertemplatenavigationsymbolsempty

% Fonts
\usepackage{fontspec}
\setmainfont{Source Serif Pro}[Ligatures=TeX]
\setsansfont{Source Sans Pro}[Ligatures=TeX]
\setmonofont{Source Code Pro}[
  BoldFont={* Medium},
  BoldItalicFont={* Medium Italic},
]

\usepackage[outputdir=out]{minted}
\usepackage{tikz}
\usetikzlibrary{positioning, fit}

\tikzset{
  invisible/.style={opacity=0,text opacity=0},
  highlight/.style={color=red},
  intro/.code args={<#1>}{%
    \only<#1>{\pgfkeysalso{highlight}}
    \alt<#1->{}{\pgfkeysalso{invisible}}
  },
}

\title{Miri}
\subtitle{An interpreter for Rust's mid-level intermediate representation}
\author{
  Scott Olson
  \texorpdfstring{\\ \scriptsize{Supervisor: Christopher Dutchyn}}{}
}
\institute{
  CMPT 400 \\
  University of Saskatchewan
}
\date{}
\titlegraphic{
  \includegraphics[width=64px,height=64px]{rust-logo-512x512.png} \\
  \scriptsize{\url{https://www.rust-lang.org}}
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Intro slides
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{document}

\maketitle

\begin{frame}[fragile]
  \frametitle{What is Rust? \small{[review]}}

  According to the website\dots

  \begin{quote}
    \textbf{Rust} is a systems programming language that runs blazingly fast,
    prevents nearly all segfaults, and guarantees thread safety.
  \end{quote}

  It's a new programming language from Mozilla, and it looks like this:

  \begin{minted}[
    autogobble,
    fontsize=\footnotesize,
    mathescape,
    xleftmargin=.3in,
  ]{rust}
    fn factorial(n: u64) -> u64 {
        (1..n).fold(1, |a, b| a * b)
    }

    fn main() {
        for x in 1..6 {
            println!("{}", factorial(x));
        }
        // $\Rightarrow$ 1
        // $\Rightarrow$ 1
        // $\Rightarrow$ 2
        // $\Rightarrow$ 6
        // $\Rightarrow$ 24
    }
  \end{minted}
\end{frame}

\begin{frame}
  \frametitle{How does Rust compile code? \onslide<-6>{\small{[review]}}}

  \begin{center}
    \begin{tikzpicture}[x=4cm, y=3.5cm, auto, rounded corners]
      \tikzstyle{basic-stage}=[rectangle, draw, thick, align=center]
      \tikzstyle{stage}=[basic-stage, font=\tiny]
      \tikzstyle{pass}=[thick, -stealth]
      \tikzstyle{pass-label}=[font=\footnotesize]

      \node[basic-stage] (src) at (0,0) {Source\\Code};
      \node[basic-stage] (mach) at (2,-1) {Machine\\Code};

      \draw<1>[pass, out=0, in=180]
        (src.east) to node[font=\Huge] {?} (mach.west);

      \node[stage, intro=<2>] (ast) at (1,0)
        {\normalsize{AST} \\ Abstract Syntax Tree};
      \draw[pass, intro=<2>]
        (src) -- node[pass-label] {Parse} (ast);

      \node[stage, intro=<3>] (hir) at (2,0)
        {\normalsize{HIR} \\ High-level Intermediate\\Representation};
      \draw[pass, intro=<3>]
        (ast) -- node[pass-label] {Simplify} (hir);

      \node[stage, intro=<4>] (mir) at (0,-1)
        {\normalsize{MIR} \\ Mid-level Intermediate\\Representation};
      \path (hir.south) -- coordinate (middle) (mir.north);
      \draw[pass, intro=<4>]
        (hir.south) |- (middle) -| (mir.north);
      \node[pass-label, above, intro=<4>] at (middle) {Lower};

      \node[stage, intro=<5>] (llvm) at (1,-1)
        {\normalsize{LLVM IR} \\ Low-level Intermediate\\Representation};
      \draw[pass, intro=<5>]
        (mir) -- node[pass-label] {Translate} (llvm);

      \draw<6->[pass, intro=<6>]
        (llvm) -- node[pass-label] {Magic} (mach);

      \node[stage, intro=<7>] (exec) at (1,-1.75)
        {\normalsize{Execution}};
      \draw[pass, intro=<7>]
        (mach) -- node[pass-label] {CPU} (exec);

      \draw[pass, intro=<8>]
        (mir) -- node[pass-label] {Miri} (exec);
    \end{tikzpicture}
  \end{center}
\end{frame}

\begin{frame}
  \frametitle{Why build Miri?}
  \begin{itemize}
    \item For fun and learning.

    \item I originally planned to use it for testing the compiler and execution
      of unsafe code, but shifted my goals along the way. \pause

    \item Now it serves as an experimental implementation of the upcoming
      compile-time function evaluation feature in Rust. \pause

      \begin{itemize}
        \item Similar to C++14's \mintinline{cpp}{constexpr} feature.

        \item You can do complicated calculations at compile time and compile
          their \emph{results} into the executable. \pause

        \item For example, you can compute a ``perfect hash function'' for a
          statically-known map at compile-time and have guaranteed no-collision
          lookup at runtime. \pause

        \item Miri actually supports far more of Rust than C++14's
          \mintinline{cpp}{constexpr} does of C++ --- even heap allocation and
          unsafe code.
      \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{How was it built?}

  At first I wrote a naive version with a number of downsides:

  \begin{itemize}
    \item represented values in a traditional dynamic language format, where
      every value was the same size.

    \item didn't work well for aggregates (structs, enums, arrays, etc.).

    \item made unsafe programming tricks that make assumptions about low-level
      value layout essentially impossible.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{How was it built?}
  \begin{itemize}
    \item Later, a Rust compiler team member proposed a ``Rust abstract
      machine'' with specialized value layout which solved my previous problems.
      \pause

    \item His proposal was intended for a compile-time function evaluator in the
      Rust compiler, so I effectively implemented an experimental version of
      that. \pause

    \item After this point, making Miri work well was primarily a software
      engineering problem.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Data layout}
  \begin{itemize}
    \item Memory in Miri is literally a HashMap from ``allocation IDs'' to
      ``abstract allocations''.

    \item Allocations are represented by: \pause
      \begin{enumerate}
        \item An array of \textbf{raw bytes} with a size based on the type of
          the value \pause
        \item A set of \textbf{relocations} --- pointers into other abstract
          allocations \pause
        \item A mask determining which bytes are \textbf{undefined}
      \end{enumerate}
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{\texttt{square} example}
  \begin{center}
    \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
      // Rust
      fn square(n: u64) -> u64 {
          n * n
      }

      // Generated MIR
      fn square(arg0: u64) -> u64 {
          let var0: u64; // n           // On function entry, Miri creates
                                        // virtual allocations for all the
                                        // arguments, variables, and
                                        // temporaries.

          bb0: {
              var0 = arg0;              // Copy the argument into `n`.
              return = Mul(var0, var0); // Multiply `n` with itself.
              goto -> bb1;              // Jump to basic block `bb1`.
          }

          bb1: {
              return;                   // Return from the current fn.
          }
      }
    \end{minted}
  \end{center}
\end{frame}

\begin{frame}[fragile]
  \frametitle{\texttt{sum} example}
  \begin{center}
    \begin{minted}[autogobble,fontsize=\tiny]{rust}
      // Rust
      fn sum() -> u64 {
          let mut sum = 0; let mut i = 0;
          while i < 10 { sum += i; i += 1; }
          sum
      }

      // Generated MIR
      fn sum() -> u64 {
          let mut var0: u64; // sum
          let mut var1: u64; // i
          let mut tmp0: bool;

          bb0: {
              // sum = 0; i = 0;
              var0 = const 0u64; var1 = const 0u64; goto -> bb1;
          }
          bb1: {
              // if i < 10 { goto bb2; } else { goto bb3; }
              tmp0 = Lt(var1, const 10u64);
              if(tmp0) -> [true: bb2, false: bb3];
          }
          bb2: {
              var0 = Add(var0, var1);       // sum = sum + i;
              var1 = Add(var1, const 1u64); // i = i + 1;
              goto -> bb1;
          }
          bb3: {
              return = var0; goto -> bb4;
          }
          bb4: { return; }
      }
    \end{minted}
  \end{center}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Heap allocations!}
  \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
    fn make_vec() -> Vec<u8> {
        // Empty array with space for 4 bytes - allocated on the heap!
        let mut vec = Vec::with_capacity(4);
        // Initialize the first two slots.
        vec.push(1);
        vec.push(2);
        vec
    }

    // For reference:
    //   struct Vec<T> { capacity: usize, data: *mut T, length: usize }

    // Resulting allocations (on 32-bit little-endian architectures):
    //   Region A:
    //     04 00 00 00  00 00 00 00  02 00 00 00
    //                  └───(B)───┘
    //
    //   Region B:
    //     01 02 __ __ (underscores denote undefined bytes)
  \end{minted}

  \footnotesize{Evaluating the above involves a number of compiler built-ins,
  ``unsafe'' code blocks, and more inside the standard library,
  but Miri handles it all.}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Unsafe code!}
  \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
    fn out_of_bounds() -> u8 {
        let mut vec = vec![1, 2]
        unsafe { *vec.get_unchecked(5) }
    }

    // test.rs:3: error: pointer offset outside bounds of allocation
    // test.rs:3:     unsafe { *vec.get_unchecked(5) }
    //                         ^~~~~~~~~~~~~~~~~~~~~

    fn undefined_bytes() -> u8 {
        let mut vec = Vec::with_capacity(10);
        unsafe { *vec.get_unchecked(5) }
    }

    // test.rs:3: error: attempted to read undefined bytes
    // test.rs:3:     unsafe { *vec.get_unchecked(5) }
    //                         ^~~~~~~~~~~~~~~~~~~~~
  \end{minted}
\end{frame}

\begin{frame}
  \frametitle{What can't Miri do?}
  \begin{itemize}
    \item Miri can't do all the stuff I didn't implement yet. :)
      \begin{itemize}
        \item non-trivial casts
        \item function pointers
        \item calling destructors and freeing memory
        \item taking target architecture endianess and alignment information
          into account when computing data layout
        \item handling all constants properly (but, well, Miri might be
          replacing the old constants system)
      \end{itemize}
      \pause

    \item Miri can't do foreign function calls (e.g. calling functions defined
      in C or C++), but there is a reasonable way it could be done with libffi.
      \begin{itemize}
        \item On the other hand, for constant evaluation in the compiler, you
          want the evaluator to be deterministic and safe, so FFI calls might be
          banned anyway.
      \end{itemize}
      \pause

    \item Without quite some effort, Miri will probably never handle inline
      assembly...
  \end{itemize}
\end{frame}

\begin{frame}
  \begin{center}
    \LARGE{Questions?}
  \end{center}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Extra slides
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}[fragile]
  \frametitle{\texttt{varN} vs. \texttt{argN}}
  \begin{center}
    \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
      // Rust
      type Pair = (u64, u64);
      fn swap((a, b): Pair) -> Pair {
          (b, a)
      }

      // Generated MIR
      fn swap(arg0: (u64, u64)) -> (u64, u64) {
          let var0: u64; // a
          let var1: u64; // b

          bb0: {
              var0 = arg0.0;         // get the 1st part of the pair
              var1 = arg0.1;         // get the 2nd part of the pair
              return = (var1, var0); // build a new pair in the result
              goto -> bb1;
          }

          bb1: {
              return;
          }
      }
    \end{minted}
  \end{center}
\end{frame}

\begin{frame}[fragile]
  \frametitle{\texttt{factorial} example}
  \begin{center}
    \begin{minted}[autogobble,fontsize=\tiny]{rust}
      // Rust
      fn factorial(n: u64) -> u64 {
          (1..n).fold(1, |a, b| a * b)
      }

      // Generated MIR
      fn factorial(arg0: u64) -> u64 {
          let var0: u64; // n
          let mut tmp0: Range<u64>; // Miri calculates sizes for generics like Range<u64>.
          let mut tmp1: [closure];

          bb0: {
              var0 = arg0;

              // tmp0 = 1..n
              tmp0 = Range<u64> { start: const 1u64, end: var0 };

              // tmp1 = |a, b| a * b
              tmp1 = [closure];

              // This loads the MIR for the `fold` fn from the standard library.
              // In general, MIR for any function from any library can be loaded.
              // return tmp0.fold(1, tmp1)
              return = Range<u64>::fold(tmp0, const 1u64, tmp1) -> bb1;
          }

          bb1: {
              return;
          }
      }
    \end{minted}
  \end{center}
\end{frame}

\end{document}
